knitr::opts_chunk$set(echo = TRUE, message=F, warning=F, fig.width = 14, fig.height = 5)
library(readr)
library(sf)
## Linking to GEOS 3.13.1, GDAL 3.11.0, PROJ 9.6.0; sf_use_s2() is TRUE
library(leaflet)
library(rstudioapi)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
library(patchwork)
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
Set working directory to the place this script is saved:
# Getting the path of your current open file
current_path = rstudioapi::getActiveDocumentContext()$path
setwd(dirname(current_path))
Load our data:
# load data that has the dates the historic districts were designated
# comes from here: https://planning.dc.gov/page/dc-historic-districts
# hd_data <- readr::read_csv("https://docs.google.com/spreadsheets/d/1Ajl1iAS0NRB7vk_UFDveeWzGkwf3tuiDo-zV9_wtzRM/gviz/tq?tqx=out:csv&sheet=data")
hd_data <- readr::read_csv("hd_data/data.csv")
# load the historic district boundary shape files
# comes from here: https://opendata.dc.gov/datasets/DCGIS::historic-districts/about
hd_shp <- sf::st_read("Historic_Districts/Historic_Districts.shp")
## Reading layer `Historic_Districts' from data source
## `C:\Users\edwar\Documents\GitHub\hd_analysis\Historic_Districts\Historic_Districts.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 73 features and 18 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: -8584936 ymin: 4696608 xmax: -8564736 ymax: 4720410
## Projected CRS: WGS 84 / Pseudo-Mercator
# load the 2022 ward shape files
# comes from here: https://opendata.dc.gov/datasets/DCGIS::wards-from-2022/about
ward_shp <- sf::st_read("Wards_from_2022/Wards_from_2022.shp")
## Reading layer `Wards_from_2022' from data source
## `C:\Users\edwar\Documents\GitHub\hd_analysis\Wards_from_2022\Wards_from_2022.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 8 features and 20 fields
## Geometry type: POLYGON
## Dimension: XY
## Bounding box: xmin: -8584936 ymin: 4691870 xmax: -8561487 ymax: 4721094
## Projected CRS: WGS 84 / Pseudo-Mercator
# load the zoning map:
# comes from here: https://opendata.dc.gov/datasets/DCGIS::zoning-boundaries-zoning-regulations-of-2016/about
zone_shp <- sf::st_read("zoning/Zoning_Boundaries_(Zoning_Regulations_of_2016).shp")
## Reading layer `Zoning_Boundaries_(Zoning_Regulations_of_2016)' from data source
## `C:\Users\edwar\Documents\GitHub\hd_analysis\zoning\Zoning_Boundaries_(Zoning_Regulations_of_2016).shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 953 features and 15 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: -8584936 ymin: 4691870 xmax: -8561487 ymax: 4721094
## Projected CRS: WGS 84 / Pseudo-Mercator
# load & clean census tract data
# please see https://opendata.dc.gov/datasets/DCGIS::census-tracts-in-1970/about
# for example, for more details on variable names
load_clean_tracts <- function(geo_id_var, black_var, white_var, totpop_var, year) {
# Loads the shapefile, removes unneeded columns, calculates the tract area in meters^2
shp <- sf::st_read(paste0("tract_data/Census_Tracts_in_", year,
"/Census_Tracts_in_", year, ".shp"))
shp <- shp %>%
rename("geo_id" = !!sym(geo_id_var),
"n_black" = !!sym(black_var),
"n_white" = !!sym(white_var),
"n_tot" = !!sym(totpop_var)
) %>%
select("geo_id", starts_with("n_")) %>%
mutate(n_other = n_tot - (n_black + n_white),
year = year)
shp$geo_area_meters <- sf::st_area(shp)
shp <- sf::st_transform(shp, 4326)
return(shp %>% select("year", "geo_id", "n_tot", "n_black", "n_white", "n_other", "geo_area_meters", "geometry"))
}
t60_shp <- load_clean_tracts("GISJOIN", "B58013", "B58011", "CA4001", 1960)
t70_shp <- load_clean_tracts("GISJOIN", "CEB03", "CEB01", "CY7001", 1970)
t80_shp <- load_clean_tracts("GISJOIN", "C9D003", "C9D001", "C7L001", 1980)
t90_shp <- load_clean_tracts("TRACTNO", "BLACK", "WHITE", "POPULATION", 1990)
t00_shp <- load_clean_tracts("TRACTNO", "BLACK", "WHITE", "TOTAL", 2000)
t10_shp <- load_clean_tracts("TRACT", "P0010004", "P0010003", "P0010001", 2010)
t20_shp <- load_clean_tracts("TRACT", "P0010004", "P0010003", "P0010001", 2020)
gc()
Merge HD data onto HD shapefile, subset to only look at neighborhood HDs:
hd_shp <- dplyr::left_join(x = hd_shp, y = hd_data, by = "UNIQUEID")
hd_shp <- hd_shp[hd_shp$Neighborhood_HD==1,]
Transform shape files to mercator projection:
# convert to mercator projection
zone_shp <- sf::st_transform(zone_shp, 4326)
hd_shp <- sf::st_transform(hd_shp, 4326)
ward_shp <- sf::st_transform(ward_shp, 4326)
Subset the zoning map to just look at residential zones:
# list zones:
zones_list <- sort(unique(zone_shp$ZR16))
housing_zones <- zones_list[grep(x=zones_list, pattern = "^R|^MU")]
# subset
zone_shp <- zone_shp[zone_shp$ZR16 %in% housing_zones,]
# create simplified labels
zone_shp$ZR16_simple <- "Other"
zone_shp$ZR16_simple[grep(x=zone_shp$ZR16, pattern="^RA-")] <- "Apartment zones"
zone_shp$ZR16_simple[grep(x=zone_shp$ZR16, pattern="^R-")] <- "Residential zones"
zone_shp$ZR16_simple[grep(x=zone_shp$ZR16, pattern="^RF-")] <- "Residential flat zones"
zone_shp$ZR16_simple[grep(x=zone_shp$ZR16, pattern="^MU-")] <- "Mixed use zones"
# show on a map:
factpal <- colorFactor(palette = "Set1", domain = zone_shp$ZR16_simple)
leaflet(zone_shp) %>%
addProviderTiles(providers$CartoDB.Positron) %>%
addPolygons(fillColor = ~factpal(ZR16_simple), # Apply the color function
fillOpacity = 0.7,
weight = 1,
opacity = 1,
color = "white",
label=~ZR16,
highlightOptions = highlightOptions(weight = 3,
color = "white",
bringToFront = TRUE
)
) %>%
addLegend(pal = factpal, values = ~ZR16_simple, opacity = 0.7, title = NULL,
position = "bottomright")
Calculate the total amount of residential and MU land in DC:
# need to fix broken geometries:
fix_geo_if_broken <- function(shp) {
if (min(sf::st_is_valid(shp)) == 0) {
print("Fixing geometry...")
return(sf::st_make_valid(shp))
} else {
return(shp)
}
}
zone_shp <- fix_geo_if_broken(zone_shp)
## [1] "Fixing geometry..."
hd_shp <- fix_geo_if_broken(hd_shp)
ward_shp <- fix_geo_if_broken(ward_shp)
t60_shp <- fix_geo_if_broken(t60_shp)
t70_shp <- fix_geo_if_broken(t70_shp)
t80_shp <- fix_geo_if_broken(t80_shp)
t90_shp <- fix_geo_if_broken(t90_shp)
t00_shp <- fix_geo_if_broken(t00_shp)
t10_shp <- fix_geo_if_broken(t10_shp)
t20_shp <- fix_geo_if_broken(t20_shp)
zone_shp$area_meters <- sf::st_area(zone_shp)
zone_shp$area_acres <- as.vector(zone_shp$area_meters * 0.000247105)
total_zone_acres <- sum(zone_shp$area_acres, na.rm=T)
Total land area covered by HDs over time:
# get shape areas:
hd_shp$area_meters <- sf::st_area(hd_shp)
hd_shp$area_acres <- as.vector(hd_shp$area_meters * 0.000247105)
years <- c(1960, 1970, 1980, 1990, 2000, 2010, 2025)
land_areas <- rep(NA, length(years))
counter <- 1
for (year in years) { # Land area covered by HDs by year:
land_area <- sum(hd_shp$area_acres[hd_shp$desig_date < year])
land_areas[counter] <- land_area
counter <- counter + 1
}
p <- data.frame(years, land_areas, round(100*land_areas / total_zone_acres,0))
names(p) <- c("Year",
"Area covered by by Historic Districts, in Acres",
"Percent of 2016 residential zone covered by Neighborhood HD")
plot1 <-
ggplot(p,
aes(x=Year,
y=`Area covered by by Historic Districts, in Acres`)) +
geom_bar(stat = "identity", fill="#0f9535") +
geom_text(aes(label = round(`Area covered by by Historic Districts, in Acres`, 0), vjust = -1.7)) +
ylab("Acres") +
theme_minimal() +
ggtitle('Acres covered by "neighborhood historic districts" has steadily increased over time')
plot2 <-
ggplot(p,
aes(x=Year,
y=`Percent of 2016 residential zone covered by Neighborhood HD`)) +
geom_line(color="#0f9535", size=.75) +
geom_point(color="#0f9535") +
geom_text(aes(label = paste0(`Percent of 2016 residential zone covered by Neighborhood HD`, "%")), vjust = -1.7) +
theme_minimal() +
ylab("Percent") +
ggtitle('And the % of residential area covered by HDs has more than doubled since 1980')
plot1 + plot2
Let’s quickly compare which HDs were designated before vs after 1980:
hd_shp$flag_1980 <- 0
hd_shp$flag_1980[hd_shp$desig_date < 1980] <- 1
leaflet() %>%
addProviderTiles(providers$CartoDB.Positron) %>%
addPolygons(group= "Designated before 1980",
data=hd_shp[hd_shp$flag_1980==1,],
fillColor = "skyblue",
fillOpacity = 0.7,
weight = 1,
opacity = 1,
color = "white",
label=~LABEL,
highlightOptions = highlightOptions(weight = 3,
color = "white",
bringToFront = FALSE
)
) %>%
addPolygons(group= "Designated after 1980",
data=hd_shp[hd_shp$flag_1980==0,],
fillColor = "hotpink",
fillOpacity = 0.7,
weight = 1,
opacity = 1,
color = "white",
label=~LABEL,
highlightOptions = highlightOptions(weight = 3,
color = "white",
bringToFront = FALSE
)
) %>%
addLayersControl(
overlayGroups = c("Designated before 1980", "Designated after 1980"),
options = layersControlOptions(collapsed = FALSE)
)
Now let’s look and see how HD changed over time, in terms of % black residents and % white residents, compared to nearby neighborhoods (tracts) that were not in HDs.
This will have a few steps:
# drop some very small HDs that are like a single circle
# just dropped anything smaller than 10 acres
hd_shp <-
hd_shp %>%
filter(!(LABEL %in% c("Emerald St HD",
"Grant Rd HD",
"Mount Vernon Triangle HD",
"Grant Circle HD",
"Union Market"
)))
get_geos_in_hd <- function(shp, min_pct, year) {
# get the tracts that are in each HD in a given year
i <- sf::st_intersection(x=shp, y=hd_shp)
i$i_area <- sf::st_area(i)
i$pct_of_geo_area <- as.vector(i$i_area / i$geo_area_meters)
geos_in_hd <- i[i$pct_of_geo_area > min_pct,
c("year", "geo_id", "LABEL",
"n_tot", "n_black", "n_white", "n_other",
"desig_date", "pct_of_geo_area")]
geos_in_hd_summary <-
geos_in_hd %>%
mutate(n_tot_prorated = n_tot * pct_of_geo_area,
n_black_prorated = n_black * pct_of_geo_area,
n_white_prorated = n_white * pct_of_geo_area,
n_other_prorated = n_other * pct_of_geo_area) %>%
select("year", "LABEL", "geo_id", starts_with("n_"), "desig_date") %>%
group_by(LABEL) %>%
summarise(n_tot = sum(n_tot, na.rm=T),
n_black = sum(n_black, na.rm=T),
n_white = sum(n_white, na.rm=T),
n_other = sum(n_other, na.rm=T),
n_tot_prorated = sum(n_tot_prorated, na.rm=T),
n_black_prorated = sum(n_black_prorated, na.rm=T),
n_white_prorated = sum(n_white_prorated, na.rm=T),
n_other_prorated = sum(n_other_prorated, na.rm=T),
desig_year = max(desig_date, na.rm=T)) %>%
mutate(desig_yet = ifelse(desig_year<year, 1, 0),
year = year)
rv = list("geos_in_hd"=geos_in_hd, "summary"=sf::st_drop_geometry(geos_in_hd_summary))
return(rv)
}
# turn off spherical geometry (s2)
sf_use_s2(FALSE)
# ranging the min % between .2 and .6 seems to give reasonable results
mp = 0.25 # TODO: run analysis varying this factor
hd_geos60 <- get_geos_in_hd(t60_shp, min_pct = mp, year = 1960)
hd_geos70 <- get_geos_in_hd(t70_shp, min_pct = mp, year = 1970)
hd_geos80 <- get_geos_in_hd(t80_shp, min_pct = mp, year = 1980)
hd_geos90 <- get_geos_in_hd(t90_shp, min_pct = mp, year = 1990)
hd_geos00 <- get_geos_in_hd(t00_shp, min_pct = mp, year = 2000)
hd_geos10 <- get_geos_in_hd(t10_shp, min_pct = mp, year = 2010)
hd_geos20 <- get_geos_in_hd(t20_shp, min_pct = mp, year = 2020)
gc()
## used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 1473426 78.7 2423300 129.5 2423300 129.5
## Vcells 4398425 33.6 10146329 77.5 10146321 77.5
Plot where the tracts “inside” the HDs were for 2020:
plot_geos_in_hds <- function(shp, geos_in_hds) {
rv <-
leaflet() %>%
addProviderTiles(providers$CartoDB.Positron) %>%
addPolygons(group= "tracts labeled as in HDs",
data=shp[shp$geo_id %in% geos_in_hds, ],
fillColor = "skyblue",
fillOpacity = 0.7,
weight = 1,
opacity = 1,
color = "white",
label=~geo_id,
highlightOptions = highlightOptions(weight = 3,
color = "white",
bringToFront = FALSE
)
) %>%
addPolygons(group= "HDs",
data=hd_shp,
fillColor = "hotpink",
fillOpacity = 0.7,
weight = 1,
opacity = 1,
color = "white",
label=~LABEL,
highlightOptions = highlightOptions(weight = 3,
color = "white",
bringToFront = FALSE
)
) %>%
addLayersControl(
overlayGroups = c("tracts labeled as in HDs", "HDs"),
options = layersControlOptions(collapsed = FALSE)
)
return(rv)
}
plot_geos_in_hds(t60_shp, hd_geos60$geos_in_hd$geo_id)
plot_geos_in_hds(t70_shp, hd_geos70$geos_in_hd$geo_id)
plot_geos_in_hds(t80_shp, hd_geos80$geos_in_hd$geo_id)
plot_geos_in_hds(t90_shp, hd_geos90$geos_in_hd$geo_id)
plot_geos_in_hds(t00_shp, hd_geos00$geos_in_hd$geo_id)
plot_geos_in_hds(t10_shp, hd_geos10$geos_in_hd$geo_id)
plot_geos_in_hds(t20_shp, hd_geos20$geos_in_hd$geo_id)
Now get a list of neighboring tracts:
get_neighbor_geos <- function(hd_shp, geo_shp, buffer_dist, geos_in_hd, remove_geo_thresh, year) {
# first make a buffer around the HDs
b <- sf::st_buffer(hd_shp, dist = buffer_dist)
# then get the intersection of the buffer and the tracts
i <- sf::st_intersection(x=geo_shp, y=b)
# remove tracts that have already been classified as within an HD
i <- i[!(i$geo_id %in% geos_in_hd$geos_in_hd$geo_id),]
# also remove any tracts/blocks for which more than X% of an HD is in that tract/block
hd_shp$area_meters <- sf::st_area(hd_shp)
hd_geo <- sf::st_intersection(x=geo_shp, y=hd_shp)
hd_geo$intersect_area <- sf::st_area(hd_geo)
hd_geo$pct_area <- as.vector(hd_geo$intersect_area / hd_geo$area_meters)
geos_to_remove <- hd_geo$geo_id[hd_geo$pct_area > remove_geo_thresh]
neighboring_geos <- i[!(i$geo_id %in% geos_to_remove),]
# finally, remove
neighboring_geos <- geo_shp[geo_shp$geo_id %in% neighboring_geos$geo_id,]
neighboring_geos <- dplyr::left_join(neighboring_geos,
sf::st_drop_geometry(i[,c("geo_id", "LABEL", "desig_date")]),
by="geo_id")
neighbor_geos_summary <-
sf::st_drop_geometry(neighboring_geos) %>%
group_by(LABEL) %>%
summarise(n_tot = sum(n_tot, na.rm=T),
n_black = sum(n_black, na.rm=T),
n_white = sum(n_white, na.rm=T),
n_other = sum(n_other, na.rm=T),
desig_year = max(desig_date, na.rm=T)) %>%
mutate(desig_yet = ifelse(desig_year<year, 1, 0),
year = year)
rv = list("buffers" = b,
"neighbor_geos" = neighboring_geos,
"neighbor_geos_summary" = neighbor_geos_summary)
return(rv)
}
buff_dist = .005
threshold = .1
nearby_tracts60 <-
get_neighbor_geos(hd_shp=hd_shp, geo_shp=t60_shp, buffer_dist=buff_dist, geos_in_hd=hd_geos70,threshold, 1960)
nearby_tracts70 <-
get_neighbor_geos(hd_shp=hd_shp, geo_shp=t70_shp, buffer_dist=buff_dist, geos_in_hd=hd_geos70,threshold, 1970)
nearby_tracts80 <-
get_neighbor_geos(hd_shp=hd_shp, geo_shp=t80_shp, buffer_dist=buff_dist, geos_in_hd=hd_geos80,threshold, 1980)
nearby_tracts90 <-
get_neighbor_geos(hd_shp=hd_shp, geo_shp=t90_shp, buffer_dist=buff_dist, geos_in_hd=hd_geos90,threshold, 1990)
gc()
## used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 1474684 78.8 2423300 129.5 2423300 129.5
## Vcells 4855403 37.1 10146329 77.5 10146321 77.5
nearby_tracts00 <-
get_neighbor_geos(hd_shp=hd_shp, geo_shp=t00_shp, buffer_dist=buff_dist, geos_in_hd=hd_geos00,threshold, 2000)
nearby_tracts10 <-
get_neighbor_geos(hd_shp=hd_shp, geo_shp=t10_shp, buffer_dist=buff_dist, geos_in_hd=hd_geos10,threshold, 2010)
nearby_tracts20 <-
get_neighbor_geos(hd_shp=hd_shp, geo_shp=t20_shp, buffer_dist=buff_dist, geos_in_hd=hd_geos20,threshold, 2020)
gc()
## used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 1475767 78.9 2423300 129.5 2423300 129.5
## Vcells 4913086 37.5 10146329 77.5 10146321 77.5
plot_neighbor_geos <- function(shp) {
rv <-
leaflet() %>%
addProviderTiles(providers$CartoDB.Positron) %>%
addPolygons(group='buffers', data=shp[["buffers"]]) %>%
addPolygons(group= "tracts labeled as near HDs",
data=shp[["neighbor_geos"]],
fillColor = "skyblue",
fillOpacity = 0.7,
weight = 1,
opacity = 1,
color = "white",
label=~paste0("Tract: ", geo_id, "; HD neighbor: ", LABEL),
highlightOptions = highlightOptions(weight = 3,
color = "white",
bringToFront = FALSE
)
) %>%
addPolygons(group= "HDs",
data=hd_shp[hd_shp$desig_date < 2025,],
fillColor = "hotpink",
fillOpacity = 0.7,
weight = 1,
opacity = 1,
color = "white",
label=~LABEL,
highlightOptions = highlightOptions(weight = 3,
color = "white",
bringToFront = FALSE
)
) %>%
addLayersControl(
overlayGroups = c("tracts labeled as near HDs", "HDs", "buffers"),
options = layersControlOptions(collapsed = FALSE)
)
return(rv)
}
plot_neighbor_geos(nearby_tracts60)
plot_neighbor_geos(nearby_tracts20)
Now compare the demographics of the HD tracts and their neighbors in each year:
hd_comp_df <- dplyr::bind_rows(hd_geos60[[2]], hd_geos70[[2]], hd_geos80[[2]], hd_geos90[[2]],
hd_geos00[[2]], hd_geos10[[2]], hd_geos20[[2]],)
near_comp_df <- dplyr::bind_rows(nearby_tracts60[[3]], nearby_tracts70[[3]],nearby_tracts80[[3]], nearby_tracts90[[3]],
nearby_tracts00[[3]], nearby_tracts10[[3]], nearby_tracts20[[3]],)
hd_comp_df <-
hd_comp_df %>%
select(-ends_with("_prorated"), -desig_year) %>%
group_by(LABEL, desig_yet, year) %>%
summarise_all(., sum) %>%
mutate(pct_black = n_black / n_tot,
pct_white = n_white / n_tot) %>%
rename_with(~ paste0(., "_hd"))
near_comp_df <-
near_comp_df %>%
select(-desig_year) %>%
group_by(LABEL, desig_yet, year) %>%
mutate(pct_black = n_black / n_tot,
pct_white = n_white / n_tot) %>%
summarise_all(., sum) %>%
rename_with(~ paste0(., "_near"))
comp_df <- dplyr::full_join(x = hd_comp_df,
y = near_comp_df,
by = c("LABEL_hd"="LABEL_near",
"desig_yet_hd"="desig_yet_near",
"year_hd"="year_near")) %>%
select(starts_with(c("LABEL", "desig_yet", "year", "pct_")))
comp_df <-
comp_df %>%
tidyr::pivot_longer(
cols = starts_with("pct_"),
names_to = c("group", "treatment_control"),
names_pattern = "pct_([a-zA-Z]+)_([a-zA-Z]+)",
values_to = "percent")
# remove HDs that are always HDs or always not HDs during the timeframe
comp_df <-
comp_df %>%
group_by(LABEL_hd) %>%
mutate(mean_status = mean(desig_yet_hd, na.rm=T)) %>%
filter(mean_status > 0) %>%
filter(mean_status < 1)
comp_df$treated <- 0
comp_df$treated[comp_df$treatment_control=="hd"] <- 1
# diff in diff analysis for change in the % of black residents
diff_in_diff <- lm(percent ~ # outcome: % of white or black residents
treated + # "treatment": whether tract is in a HD or not
desig_yet_hd + # pre/post indicator: whether HD was designated yet
treated:desig_yet_hd + # D-in-D estimator: effect of treatment after implemented
as.factor(LABEL_hd) + # fixed effect for HD area
as.factor(year_hd), # fixed effect for year
data = comp_df[comp_df$group=="black",])
summary(diff_in_diff)
##
## Call:
## lm(formula = percent ~ treated + desig_yet_hd + treated:desig_yet_hd +
## as.factor(LABEL_hd) + as.factor(year_hd), data = comp_df[comp_df$group ==
## "black", ])
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.51441 -0.08561 0.00642 0.10008 0.32372
##
## Coefficients:
## Estimate Std. Error t value
## (Intercept) 0.982139 0.050620 19.402
## treated -0.032361 0.028279 -1.144
## desig_yet_hd -0.045030 0.034295 -1.313
## as.factor(LABEL_hd)Blagden Alley/Naylor Court HD -0.179310 0.071455 -2.509
## as.factor(LABEL_hd)Bloomingdale HD -0.168882 0.061650 -2.739
## as.factor(LABEL_hd)Capitol Hill HD -0.346544 0.058854 -5.888
## as.factor(LABEL_hd)Cleveland Park HD -0.820924 0.059037 -13.905
## as.factor(LABEL_hd)Downtown HD -0.247801 0.072154 -3.434
## as.factor(LABEL_hd)Dupont Circle HD -0.763644 0.059926 -12.743
## as.factor(LABEL_hd)Financial HD -0.613816 0.154058 -3.984
## as.factor(LABEL_hd)Foggy Bottom HD -0.886229 0.074670 -11.869
## as.factor(LABEL_hd)Foxhall HD -0.933840 0.072154 -12.942
## as.factor(LABEL_hd)Georgetown HD -0.842525 0.059026 -14.274
## as.factor(LABEL_hd)Greater 14th St HD -0.443055 0.072443 -6.116
## as.factor(LABEL_hd)Greater U St HD -0.239301 0.059571 -4.017
## as.factor(LABEL_hd)GWU / Old West End HD -0.900888 0.076070 -11.843
## as.factor(LABEL_hd)Kalorama Triangle HD -0.692670 0.059037 -11.733
## as.factor(LABEL_hd)Kingman Park HD -0.052059 0.062752 -0.830
## as.factor(LABEL_hd)Lafayette Square HD -0.911952 0.079354 -11.492
## as.factor(LABEL_hd)LeDroit Park HD -0.152142 0.071058 -2.141
## as.factor(LABEL_hd)Logan Circle HD -0.209050 0.074684 -2.799
## as.factor(LABEL_hd)Massachusetts Ave HD -0.881518 0.074619 -11.814
## as.factor(LABEL_hd)Meridian Hill -0.408705 0.070015 -5.837
## as.factor(LABEL_hd)Mt. Pleasant HD -0.491427 0.059037 -8.324
## as.factor(LABEL_hd)Mt. Vernon Square HD -0.136578 0.060485 -2.258
## as.factor(LABEL_hd)Pennsylvania Ave NHS -0.426778 0.066439 -6.424
## as.factor(LABEL_hd)Shaw HD -0.196403 0.059571 -3.297
## as.factor(LABEL_hd)Sheridan-Kalorama HD -0.776764 0.059037 -13.157
## as.factor(LABEL_hd)Sixteenth St HD -0.250765 0.071058 -3.529
## as.factor(LABEL_hd)Strivers' Section HD -0.513097 0.075310 -6.813
## as.factor(LABEL_hd)Takoma Park HD -0.281888 0.071088 -3.965
## as.factor(LABEL_hd)Washington Heights HD -0.693408 0.063764 -10.875
## as.factor(LABEL_hd)Woodley Park HD -0.807681 0.071455 -11.303
## as.factor(year_hd)1970 0.079270 0.032357 2.450
## as.factor(year_hd)1980 0.086808 0.033812 2.567
## as.factor(year_hd)1990 0.051707 0.036592 1.413
## as.factor(year_hd)2000 0.008481 0.040492 0.209
## as.factor(year_hd)2010 -0.110781 0.043595 -2.541
## as.factor(year_hd)2020 -0.196117 0.045568 -4.304
## treated:desig_yet_hd -0.098483 0.036399 -2.706
## Pr(>|t|)
## (Intercept) < 2e-16 ***
## treated 0.253546
## desig_yet_hd 0.190355
## as.factor(LABEL_hd)Blagden Alley/Naylor Court HD 0.012712 *
## as.factor(LABEL_hd)Bloomingdale HD 0.006589 **
## as.factor(LABEL_hd)Capitol Hill HD 1.22e-08 ***
## as.factor(LABEL_hd)Cleveland Park HD < 2e-16 ***
## as.factor(LABEL_hd)Downtown HD 0.000693 ***
## as.factor(LABEL_hd)Dupont Circle HD < 2e-16 ***
## as.factor(LABEL_hd)Financial HD 8.83e-05 ***
## as.factor(LABEL_hd)Foggy Bottom HD < 2e-16 ***
## as.factor(LABEL_hd)Foxhall HD < 2e-16 ***
## as.factor(LABEL_hd)Georgetown HD < 2e-16 ***
## as.factor(LABEL_hd)Greater 14th St HD 3.57e-09 ***
## as.factor(LABEL_hd)Greater U St HD 7.75e-05 ***
## as.factor(LABEL_hd)GWU / Old West End HD < 2e-16 ***
## as.factor(LABEL_hd)Kalorama Triangle HD < 2e-16 ***
## as.factor(LABEL_hd)Kingman Park HD 0.407540
## as.factor(LABEL_hd)Lafayette Square HD < 2e-16 ***
## as.factor(LABEL_hd)LeDroit Park HD 0.033210 *
## as.factor(LABEL_hd)Logan Circle HD 0.005514 **
## as.factor(LABEL_hd)Massachusetts Ave HD < 2e-16 ***
## as.factor(LABEL_hd)Meridian Hill 1.60e-08 ***
## as.factor(LABEL_hd)Mt. Pleasant HD 5.14e-15 ***
## as.factor(LABEL_hd)Mt. Vernon Square HD 0.024786 *
## as.factor(LABEL_hd)Pennsylvania Ave NHS 6.44e-10 ***
## as.factor(LABEL_hd)Shaw HD 0.001116 **
## as.factor(LABEL_hd)Sheridan-Kalorama HD < 2e-16 ***
## as.factor(LABEL_hd)Sixteenth St HD 0.000494 ***
## as.factor(LABEL_hd)Strivers' Section HD 6.80e-11 ***
## as.factor(LABEL_hd)Takoma Park HD 9.52e-05 ***
## as.factor(LABEL_hd)Washington Heights HD < 2e-16 ***
## as.factor(LABEL_hd)Woodley Park HD < 2e-16 ***
## as.factor(year_hd)1970 0.014960 *
## as.factor(year_hd)1980 0.010816 *
## as.factor(year_hd)1990 0.158856
## as.factor(year_hd)2000 0.834269
## as.factor(year_hd)2010 0.011640 *
## as.factor(year_hd)2020 2.39e-05 ***
## treated:desig_yet_hd 0.007275 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1457 on 256 degrees of freedom
## (116 observations deleted due to missingness)
## Multiple R-squared: 0.8519, Adjusted R-squared: 0.8294
## F-statistic: 37.77 on 39 and 256 DF, p-value: < 2.2e-16
# diff in diff analysis for change in the % of white residents
diff_in_diff <- lm(percent ~ # outcome: % of white or black residents
treated + # "treatment": whether tract is in a HD or not
desig_yet_hd + # pre/post indicator: whether HD was designated yet
treated:desig_yet_hd + # D-in-D estimator: effect of treatment after implemented
as.factor(LABEL_hd) + # fixed effect for HD area
as.factor(year_hd), # fixed effect for year
data = comp_df[comp_df$group=="white",])
summary(diff_in_diff)
##
## Call:
## lm(formula = percent ~ treated + desig_yet_hd + treated:desig_yet_hd +
## as.factor(LABEL_hd) + as.factor(year_hd), data = comp_df[comp_df$group ==
## "white", ])
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.31063 -0.08385 -0.01052 0.07917 0.45470
##
## Coefficients:
## Estimate Std. Error t value
## (Intercept) 0.096334 0.047168 2.042
## treated 0.021254 0.026351 0.807
## desig_yet_hd 0.028035 0.031956 0.877
## as.factor(LABEL_hd)Blagden Alley/Naylor Court HD 0.106677 0.066583 1.602
## as.factor(LABEL_hd)Bloomingdale HD 0.121217 0.057446 2.110
## as.factor(LABEL_hd)Capitol Hill HD 0.309114 0.054841 5.637
## as.factor(LABEL_hd)Cleveland Park HD 0.741173 0.055011 13.473
## as.factor(LABEL_hd)Downtown HD 0.171784 0.067234 2.555
## as.factor(LABEL_hd)Dupont Circle HD 0.656760 0.055839 11.762
## as.factor(LABEL_hd)Financial HD 0.597421 0.143552 4.162
## as.factor(LABEL_hd)Foggy Bottom HD 0.769557 0.069578 11.060
## as.factor(LABEL_hd)Foxhall HD 0.854456 0.067234 12.709
## as.factor(LABEL_hd)Georgetown HD 0.773753 0.055001 14.068
## as.factor(LABEL_hd)Greater 14th St HD 0.324464 0.067503 4.807
## as.factor(LABEL_hd)Greater U St HD 0.148381 0.055509 2.673
## as.factor(LABEL_hd)GWU / Old West End HD 0.778624 0.070883 10.985
## as.factor(LABEL_hd)Kalorama Triangle HD 0.585505 0.055011 10.643
## as.factor(LABEL_hd)Kingman Park HD 0.028819 0.058473 0.493
## as.factor(LABEL_hd)Lafayette Square HD 0.795959 0.073943 10.765
## as.factor(LABEL_hd)LeDroit Park HD 0.079301 0.066212 1.198
## as.factor(LABEL_hd)Logan Circle HD 0.142204 0.069591 2.043
## as.factor(LABEL_hd)Massachusetts Ave HD 0.770931 0.069531 11.088
## as.factor(LABEL_hd)Meridian Hill 0.230193 0.065241 3.528
## as.factor(LABEL_hd)Mt. Pleasant HD 0.323962 0.055011 5.889
## as.factor(LABEL_hd)Mt. Vernon Square HD 0.061670 0.056361 1.094
## as.factor(LABEL_hd)Pennsylvania Ave NHS 0.295259 0.061908 4.769
## as.factor(LABEL_hd)Shaw HD 0.096201 0.055509 1.733
## as.factor(LABEL_hd)Sheridan-Kalorama HD 0.668434 0.055011 12.151
## as.factor(LABEL_hd)Sixteenth St HD 0.141060 0.066212 2.130
## as.factor(LABEL_hd)Strivers' Section HD 0.422460 0.070175 6.020
## as.factor(LABEL_hd)Takoma Park HD 0.201627 0.066241 3.044
## as.factor(LABEL_hd)Washington Heights HD 0.566134 0.059416 9.528
## as.factor(LABEL_hd)Woodley Park HD 0.699163 0.066583 10.501
## as.factor(year_hd)1970 -0.086209 0.030150 -2.859
## as.factor(year_hd)1980 -0.117891 0.031507 -3.742
## as.factor(year_hd)1990 -0.096791 0.034097 -2.839
## as.factor(year_hd)2000 -0.156409 0.037731 -4.145
## as.factor(year_hd)2010 0.007534 0.040622 0.185
## as.factor(year_hd)2020 0.010962 0.042460 0.258
## treated:desig_yet_hd 0.105528 0.033917 3.111
## Pr(>|t|)
## (Intercept) 0.042141 *
## treated 0.420664
## desig_yet_hd 0.381145
## as.factor(LABEL_hd)Blagden Alley/Naylor Court HD 0.110350
## as.factor(LABEL_hd)Bloomingdale HD 0.035820 *
## as.factor(LABEL_hd)Capitol Hill HD 4.57e-08 ***
## as.factor(LABEL_hd)Cleveland Park HD < 2e-16 ***
## as.factor(LABEL_hd)Downtown HD 0.011197 *
## as.factor(LABEL_hd)Dupont Circle HD < 2e-16 ***
## as.factor(LABEL_hd)Financial HD 4.32e-05 ***
## as.factor(LABEL_hd)Foggy Bottom HD < 2e-16 ***
## as.factor(LABEL_hd)Foxhall HD < 2e-16 ***
## as.factor(LABEL_hd)Georgetown HD < 2e-16 ***
## as.factor(LABEL_hd)Greater 14th St HD 2.62e-06 ***
## as.factor(LABEL_hd)Greater U St HD 0.007999 **
## as.factor(LABEL_hd)GWU / Old West End HD < 2e-16 ***
## as.factor(LABEL_hd)Kalorama Triangle HD < 2e-16 ***
## as.factor(LABEL_hd)Kingman Park HD 0.622528
## as.factor(LABEL_hd)Lafayette Square HD < 2e-16 ***
## as.factor(LABEL_hd)LeDroit Park HD 0.232149
## as.factor(LABEL_hd)Logan Circle HD 0.042035 *
## as.factor(LABEL_hd)Massachusetts Ave HD < 2e-16 ***
## as.factor(LABEL_hd)Meridian Hill 0.000496 ***
## as.factor(LABEL_hd)Mt. Pleasant HD 1.22e-08 ***
## as.factor(LABEL_hd)Mt. Vernon Square HD 0.274891
## as.factor(LABEL_hd)Pennsylvania Ave NHS 3.11e-06 ***
## as.factor(LABEL_hd)Shaw HD 0.084287 .
## as.factor(LABEL_hd)Sheridan-Kalorama HD < 2e-16 ***
## as.factor(LABEL_hd)Sixteenth St HD 0.034091 *
## as.factor(LABEL_hd)Strivers' Section HD 6.01e-09 ***
## as.factor(LABEL_hd)Takoma Park HD 0.002579 **
## as.factor(LABEL_hd)Washington Heights HD < 2e-16 ***
## as.factor(LABEL_hd)Woodley Park HD < 2e-16 ***
## as.factor(year_hd)1970 0.004596 **
## as.factor(year_hd)1980 0.000226 ***
## as.factor(year_hd)1990 0.004893 **
## as.factor(year_hd)2000 4.62e-05 ***
## as.factor(year_hd)2010 0.853017
## as.factor(year_hd)2020 0.796475
## treated:desig_yet_hd 0.002073 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1358 on 256 degrees of freedom
## (116 observations deleted due to missingness)
## Multiple R-squared: 0.8459, Adjusted R-squared: 0.8224
## F-statistic: 36.03 on 39 and 256 DF, p-value: < 2.2e-16
plot_ly(comp_df[comp_df$group=="black",] %>% arrange(LABEL_hd, year_hd),
x = ~year_hd,
y = ~percent,
linetype = ~as.factor(treatment_control),
hovertext = ~paste0("Tract types: ", treatment_control, "\nDesignated yet: ", desig_yet_hd),
color = ~LABEL_hd, # Group lines by this variable
type = "scatter",
mode = "lines+markers") %>%
layout(title = "% black residents in each historic district areas before and after HD created")
plot_ly(comp_df[comp_df$group=="white",] %>% arrange(LABEL_hd, year_hd),
x = ~year_hd,
y = ~percent,
linetype = ~as.factor(treatment_control),
hovertext = ~paste0("Tract types: ", treatment_control, "\nDesignated yet: ", desig_yet_hd),
color = ~LABEL_hd, # Group lines by this variable
type = "scatter",
mode = "lines+markers") %>%
layout(title = "% white residents in each historic district areas before and after HD created")
Now we’re going to repeat all of that, but using block groups instead of tracts. We will have to rely on sightly different versions of the Census data to do this, downloaded from NHGIS rather than Open Data DC. We’re also only going back to 1970, since that’s the data I could find relatively easily.
Load and clean block data:
# b70_shp <- sf::st_read("block_shapes/US_block_1970/US_block_1970.shp")
# b70_shp <- b70_shp[b70_shp$STATE70=="11",]
# sf::st_write(b70_shp, "block_shapes/DC_block_1970/DC_block_1970.shp")
b70_shp <- sf::st_read("block_shapes/DC_block_1970/DC_block_1970.shp")
## Reading layer `DC_block_1970' from data source
## `C:\Users\edwar\Documents\GitHub\hd_analysis\block_shapes\DC_block_1970\DC_block_1970.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 4665 features and 6 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: 1610795 ymin: 308338.5 xmax: 1629412 ymax: 329313.2
## Projected CRS: USA_Contiguous_Albers_Equal_Area_Conic
b80_shp <- sf::st_read("block_shapes/DC_block_1980/DC_block_1980.shp")
## Reading layer `DC_block_1980' from data source
## `C:\Users\edwar\Documents\GitHub\hd_analysis\block_shapes\DC_block_1980\DC_block_1980.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 4627 features and 10 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: 1610795 ymin: 308338.5 xmax: 1629412 ymax: 329313.2
## Projected CRS: USA_Contiguous_Albers_Equal_Area_Conic
b90_shp <- sf::st_read("block_shapes/nhgis0092_shapefile_tl2000_110_block_1990/DC_block_1990.shp")
## Reading layer `DC_block_1990' from data source
## `C:\Users\edwar\Documents\GitHub\hd_analysis\block_shapes\nhgis0092_shapefile_tl2000_110_block_1990\DC_block_1990.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 5140 features and 5 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: 1610795 ymin: 308338.5 xmax: 1629412 ymax: 329313.2
## Projected CRS: USA_Contiguous_Albers_Equal_Area_Conic
b00_shp <- sf::st_read("block_shapes/nhgis0092_shapefile_tl2000_110_block_2000/DC_block_2000.shp")
## Reading layer `DC_block_2000' from data source
## `C:\Users\edwar\Documents\GitHub\hd_analysis\block_shapes\nhgis0092_shapefile_tl2000_110_block_2000\DC_block_2000.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 5626 features and 6 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: 1610795 ymin: 308338.5 xmax: 1629412 ymax: 329313.2
## Projected CRS: USA_Contiguous_Albers_Equal_Area_Conic
b10_shp <- sf::st_read("block_shapes/nhgis0092_shapefile_tl2010_110_block_2010/DC_block_2010.shp")
## Reading layer `DC_block_2010' from data source
## `C:\Users\edwar\Documents\GitHub\hd_analysis\block_shapes\nhgis0092_shapefile_tl2010_110_block_2010\DC_block_2010.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 6426 features and 18 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: 1610830 ymin: 308504.6 xmax: 1629412 ymax: 329361
## Projected CRS: USA_Contiguous_Albers_Equal_Area_Conic
b20_shp <- sf::st_read("block_shapes/nhgis0092_shapefile_tl2020_110_block_2020/DC_block_2020.shp")
## Reading layer `DC_block_2020' from data source
## `C:\Users\edwar\Documents\GitHub\hd_analysis\block_shapes\nhgis0092_shapefile_tl2020_110_block_2020\DC_block_2020.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 5935 features and 18 fields
## Geometry type: POLYGON
## Dimension: XY
## Bounding box: xmin: 1610830 ymin: 308504.6 xmax: 1629412 ymax: 329396
## Projected CRS: USA_Contiguous_Albers_Equal_Area_Conic
b70_df <- readr::read_csv("block_data/nhgis0093_ds96_1970_block.csv")
b80_df <- readr::read_csv("block_data/nhgis_ds104_1980_block_11.csv")
b90_df <- readr::read_csv("block_data/nhgis0092_ds120_1990_block.csv")
b00_df <- readr::read_csv("block_data/nhgis0092_ds147_2000_block.csv")
b10_df <- readr::read_csv("block_data/nhgis0092_ds172_2010_block.csv")
b20_df <- readr::read_csv("block_data/nhgis0092_ds258_2020_block.csv")
clean_block_data <- function(shp, df, shp_b_id, df_b_id, var_prefix, df_n_black, df_n_white, drop_var="", year) {
shp <- shp %>% select(!!sym(shp_b_id)) %>% rename("geo_id" = !!sym(shp_b_id))
if (drop_var!="") {df <- df %>% select(-!!sym(drop_var))}
df <- df %>%
select(!!sym(df_b_id), starts_with(var_prefix)) %>%
rowwise() %>%
mutate(n_tot = sum(c_across(starts_with(var_prefix))))
df <- df %>%
rename("geo_id" = !!sym(df_b_id),
"n_black" = !!sym(df_n_black),
"n_white" = !!sym(df_n_white)) %>%
select(-starts_with(var_prefix)) %>%
mutate(n_other = n_tot - (n_black + n_white))
shp <- dplyr::left_join(shp, df, by="geo_id")
shp <- sf::st_transform(shp, 4326)
shp$geo_area_meters <- sf::st_area(shp)
shp$year <- year
return(shp)
}
# 1970 block data has to be specially cleaned, see https://forum.ipums.org/t/race-ethnicity-data-at-a-block-level-from-1970/6178
b70_df$c_black <- b70_df$CM6001 + b70_df$CM6002
b70_df$c_other <- b70_df$CM6003 + b70_df$CM6004
b70_df$c_white <- b70_df$CM5001 + b70_df$CM5002 - b70_df$c_black - b70_df$c_other
b70_shp <- clean_block_data(b70_shp, b70_df, "GISJOIN", "GISJOIN", "c_", "c_black", "c_white", year=1970)
b80_shp <- clean_block_data(b80_shp, b80_df, "GISJOIN", "GISJOIN", "C9D0", "C9D002", "C9D001", year=1980)
b90_shp <- clean_block_data(b90_shp, b90_df, "GISJOIN", "GISJOIN", "EUY0", "EUY002", "EUY001", year=1990)
b00_shp <- clean_block_data(b00_shp, b00_df, "GISJOIN", "GISJOIN", "FYE0", "FYE002", "FYE001", year=2000)
b10_shp <- clean_block_data(b10_shp, b10_df, "GISJOIN", "GISJOIN", "H7X", "H7X003", "H7X002", "H7X001", year=2010)
b20_shp <- clean_block_data(b20_shp, b20_df, "GISJOIN", "GISJOIN", "U7J", "U7J003", "U7J002", "U7J001", year=2020)
rm(b70_df, b80_df, b90_df, b00_df, b10_df, b20_df)
plot(sf::st_geometry(b70_shp["geo_id"]))
Get the blocks in the HDs:
gc()
## used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 1889412 101.0 4181665 223.4 3055264 163.2
## Vcells 7357919 56.2 18442415 140.8 14397296 109.9
mp = 0.25 # TODO: run analysis varying this factor
hd_geos70 <- get_geos_in_hd(b70_shp, min_pct = mp, year = 1970)
hd_geos80 <- get_geos_in_hd(b80_shp, min_pct = mp, year = 1980)
hd_geos90 <- get_geos_in_hd(b90_shp, min_pct = mp, year = 1990)
hd_geos00 <- get_geos_in_hd(b00_shp, min_pct = mp, year = 2000)
hd_geos10 <- get_geos_in_hd(b10_shp, min_pct = mp, year = 2010)
hd_geos20 <- get_geos_in_hd(b20_shp, min_pct = mp, year = 2020)
plot_geos_in_hds(b70_shp, hd_geos70$geos_in_hd$geo_id)
Get the blocks neighboring the HDs:
gc()
## used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 1930122 103.1 4181665 223.4 4181665 223.4
## Vcells 7673593 58.6 18442415 140.8 14397296 109.9
buff_dist = .005
threshold = .1
nearby_blocks70 <-
get_neighbor_geos(hd_shp=hd_shp, geo_shp=b70_shp, buffer_dist=buff_dist, geos_in_hd=hd_geos70,threshold, 1970)
nearby_blocks80 <-
get_neighbor_geos(hd_shp=hd_shp, geo_shp=b80_shp, buffer_dist=buff_dist, geos_in_hd=hd_geos80,threshold, 1980)
nearby_blocks90 <-
get_neighbor_geos(hd_shp=hd_shp, geo_shp=b90_shp, buffer_dist=buff_dist, geos_in_hd=hd_geos90,threshold, 1990)
nearby_blocks00 <-
get_neighbor_geos(hd_shp=hd_shp, geo_shp=b00_shp, buffer_dist=buff_dist, geos_in_hd=hd_geos00,threshold, 2000)
nearby_blocks10 <-
get_neighbor_geos(hd_shp=hd_shp, geo_shp=b10_shp, buffer_dist=buff_dist, geos_in_hd=hd_geos10,threshold, 2010)
nearby_blocks20 <-
get_neighbor_geos(hd_shp=hd_shp, geo_shp=b20_shp, buffer_dist=buff_dist, geos_in_hd=hd_geos20,threshold, 2020)
plot_neighbor_geos(nearby_blocks70)
Now compare the demographics of the HD blocks and their neighbors in each year:
hd_comp_df <- dplyr::bind_rows(hd_geos70[[2]], hd_geos80[[2]], hd_geos90[[2]],
hd_geos00[[2]], hd_geos10[[2]], hd_geos20[[2]],)
near_comp_df <- dplyr::bind_rows(nearby_blocks70[[3]], nearby_blocks80[[3]], nearby_blocks90[[3]],
nearby_blocks00[[3]], nearby_blocks10[[3]], nearby_blocks20[[3]],)
hd_comp_df <-
hd_comp_df %>%
select(-ends_with("_prorated"), -desig_year) %>%
group_by(LABEL, desig_yet, year) %>%
summarise_all(., sum) %>%
mutate(pct_black = n_black / n_tot,
pct_white = n_white / n_tot) %>%
rename_with(~ paste0(., "_hd"))
near_comp_df <-
near_comp_df %>%
select(-desig_year) %>%
group_by(LABEL, desig_yet, year) %>%
mutate(pct_black = n_black / n_tot,
pct_white = n_white / n_tot) %>%
summarise_all(., sum) %>%
rename_with(~ paste0(., "_near"))
comp_df <- dplyr::full_join(x = hd_comp_df,
y = near_comp_df,
by = c("LABEL_hd"="LABEL_near",
"desig_yet_hd"="desig_yet_near",
"year_hd"="year_near")) %>%
select(starts_with(c("LABEL", "desig_yet", "year", "pct_")))
comp_df <-
comp_df %>%
tidyr::pivot_longer(
cols = starts_with("pct_"),
names_to = c("group", "treatment_control"),
names_pattern = "pct_([a-zA-Z]+)_([a-zA-Z]+)",
values_to = "percent")
# remove HDs that are always HDs or always not HDs during the timeframe
comp_df <-
comp_df %>%
group_by(LABEL_hd) %>%
mutate(mean_status = mean(desig_yet_hd, na.rm=T)) %>%
filter(mean_status > 0) %>%
filter(mean_status < 1)
comp_df$treated <- 0
comp_df$treated[comp_df$treatment_control=="hd"] <- 1
# diff in diff analysis for change in the % of black residents
diff_in_diff <- lm(percent ~ # outcome: % of white or black residents
treated + # "treatment": whether tract is in a HD or not
desig_yet_hd + # pre/post indicator: whether HD was designated yet
treated:desig_yet_hd + # D-in-D estimator: effect of treatment after implemented
as.factor(LABEL_hd) + # fixed effect for HD area
as.factor(year_hd), # fixed effect for year
data = comp_df[comp_df$group=="black",])
summary(diff_in_diff)
##
## Call:
## lm(formula = percent ~ treated + desig_yet_hd + treated:desig_yet_hd +
## as.factor(LABEL_hd) + as.factor(year_hd), data = comp_df[comp_df$group ==
## "black", ])
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.36570 -0.10361 -0.00381 0.08492 0.40910
##
## Coefficients:
## Estimate Std. Error t value
## (Intercept) 1.127628 0.046566 24.216
## treated -0.081062 0.023017 -3.522
## desig_yet_hd -0.037863 0.031670 -1.196
## as.factor(LABEL_hd)Blagden Alley/Naylor Court HD -0.418740 0.059032 -7.093
## as.factor(LABEL_hd)Bloomingdale HD -0.176876 0.061167 -2.892
## as.factor(LABEL_hd)Capitol Hill HD -0.437248 0.058304 -7.499
## as.factor(LABEL_hd)Cleveland Park HD -0.891329 0.058487 -15.240
## as.factor(LABEL_hd)Downtown HD -0.599615 0.059931 -10.005
## as.factor(LABEL_hd)Dupont Circle HD -0.801907 0.058304 -13.754
## as.factor(LABEL_hd)Financial HD -0.686026 0.059849 -11.463
## as.factor(LABEL_hd)Foggy Bottom HD -0.895844 0.058487 -15.317
## as.factor(LABEL_hd)Foxhall HD -0.944705 0.059931 -15.763
## as.factor(LABEL_hd)Greater 14th St HD -0.509427 0.059032 -8.630
## as.factor(LABEL_hd)Greater U St HD -0.328614 0.059032 -5.567
## as.factor(LABEL_hd)GWU / Old West End HD -0.925058 0.061167 -15.124
## as.factor(LABEL_hd)Kalorama Triangle HD -0.767472 0.058487 -13.122
## as.factor(LABEL_hd)Kingman Park HD -0.063425 0.061167 -1.037
## as.factor(LABEL_hd)Lafayette Square HD -0.849017 0.061223 -13.868
## as.factor(LABEL_hd)LeDroit Park HD -0.140810 0.058304 -2.415
## as.factor(LABEL_hd)Logan Circle HD -0.450672 0.058304 -7.730
## as.factor(LABEL_hd)Massachusetts Ave HD -0.867119 0.058304 -14.872
## as.factor(LABEL_hd)Meridian Hill -0.495563 0.061167 -8.102
## as.factor(LABEL_hd)Mt. Pleasant HD -0.527034 0.058487 -9.011
## as.factor(LABEL_hd)Mt. Vernon Square HD -0.258366 0.059032 -4.377
## as.factor(LABEL_hd)Pennsylvania Ave NHS -0.502061 0.059032 -8.505
## as.factor(LABEL_hd)Shaw HD -0.399129 0.059032 -6.761
## as.factor(LABEL_hd)Sheridan-Kalorama HD -0.896236 0.058487 -15.324
## as.factor(LABEL_hd)Sixteenth St HD -0.550494 0.058304 -9.442
## as.factor(LABEL_hd)Strivers' Section HD -0.516453 0.058487 -8.830
## as.factor(LABEL_hd)Takoma Park HD -0.318863 0.058487 -5.452
## as.factor(LABEL_hd)Washington Heights HD -0.672527 0.059931 -11.222
## as.factor(LABEL_hd)Woodley Park HD -0.894963 0.059032 -15.161
## as.factor(year_hd)1980 0.005156 0.027103 0.190
## as.factor(year_hd)1990 -0.029076 0.029979 -0.970
## as.factor(year_hd)2000 -0.089720 0.033808 -2.654
## as.factor(year_hd)2010 -0.201509 0.035602 -5.660
## as.factor(year_hd)2020 -0.250573 0.038071 -6.582
## treated:desig_yet_hd -0.058482 0.030542 -1.915
## Pr(>|t|)
## (Intercept) < 2e-16 ***
## treated 0.000491 ***
## desig_yet_hd 0.232763
## as.factor(LABEL_hd)Blagden Alley/Naylor Court HD 8.49e-12 ***
## as.factor(LABEL_hd)Bloomingdale HD 0.004095 **
## as.factor(LABEL_hd)Capitol Hill HD 6.40e-13 ***
## as.factor(LABEL_hd)Cleveland Park HD < 2e-16 ***
## as.factor(LABEL_hd)Downtown HD < 2e-16 ***
## as.factor(LABEL_hd)Dupont Circle HD < 2e-16 ***
## as.factor(LABEL_hd)Financial HD < 2e-16 ***
## as.factor(LABEL_hd)Foggy Bottom HD < 2e-16 ***
## as.factor(LABEL_hd)Foxhall HD < 2e-16 ***
## as.factor(LABEL_hd)Greater 14th St HD 2.96e-16 ***
## as.factor(LABEL_hd)Greater U St HD 5.51e-08 ***
## as.factor(LABEL_hd)GWU / Old West End HD < 2e-16 ***
## as.factor(LABEL_hd)Kalorama Triangle HD < 2e-16 ***
## as.factor(LABEL_hd)Kingman Park HD 0.300559
## as.factor(LABEL_hd)Lafayette Square HD < 2e-16 ***
## as.factor(LABEL_hd)LeDroit Park HD 0.016292 *
## as.factor(LABEL_hd)Logan Circle HD 1.42e-13 ***
## as.factor(LABEL_hd)Massachusetts Ave HD < 2e-16 ***
## as.factor(LABEL_hd)Meridian Hill 1.16e-14 ***
## as.factor(LABEL_hd)Mt. Pleasant HD < 2e-16 ***
## as.factor(LABEL_hd)Mt. Vernon Square HD 1.64e-05 ***
## as.factor(LABEL_hd)Pennsylvania Ave NHS 7.15e-16 ***
## as.factor(LABEL_hd)Shaw HD 6.53e-11 ***
## as.factor(LABEL_hd)Sheridan-Kalorama HD < 2e-16 ***
## as.factor(LABEL_hd)Sixteenth St HD < 2e-16 ***
## as.factor(LABEL_hd)Strivers' Section HD < 2e-16 ***
## as.factor(LABEL_hd)Takoma Park HD 1.00e-07 ***
## as.factor(LABEL_hd)Washington Heights HD < 2e-16 ***
## as.factor(LABEL_hd)Woodley Park HD < 2e-16 ***
## as.factor(year_hd)1980 0.849244
## as.factor(year_hd)1990 0.332850
## as.factor(year_hd)2000 0.008356 **
## as.factor(year_hd)2010 3.37e-08 ***
## as.factor(year_hd)2020 1.91e-10 ***
## treated:desig_yet_hd 0.056407 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1428 on 319 degrees of freedom
## (3 observations deleted due to missingness)
## Multiple R-squared: 0.8353, Adjusted R-squared: 0.8162
## F-statistic: 43.71 on 37 and 319 DF, p-value: < 2.2e-16
# diff in diff analysis for change in the % of white residents
diff_in_diff <- lm(percent ~ # outcome: % of white or black residents
treated + # "treatment": whether tract is in a HD or not
desig_yet_hd + # pre/post indicator: whether HD was designated yet
treated:desig_yet_hd + # D-in-D estimator: effect of treatment after implemented
as.factor(LABEL_hd) + # fixed effect for HD area
as.factor(year_hd), # fixed effect for year
data = comp_df[comp_df$group=="white",])
summary(diff_in_diff)
##
## Call:
## lm(formula = percent ~ treated + desig_yet_hd + treated:desig_yet_hd +
## as.factor(LABEL_hd) + as.factor(year_hd), data = comp_df[comp_df$group ==
## "white", ])
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.35371 -0.08122 -0.00300 0.09326 0.42613
##
## Coefficients:
## Estimate Std. Error t value
## (Intercept) -0.05391 0.04455 -1.210
## treated 0.06904 0.02202 3.135
## desig_yet_hd 0.02159 0.03030 0.712
## as.factor(LABEL_hd)Blagden Alley/Naylor Court HD 0.27904 0.05648 4.941
## as.factor(LABEL_hd)Bloomingdale HD 0.14035 0.05852 2.398
## as.factor(LABEL_hd)Capitol Hill HD 0.40590 0.05578 7.276
## as.factor(LABEL_hd)Cleveland Park HD 0.81521 0.05596 14.568
## as.factor(LABEL_hd)Downtown HD 0.31479 0.05734 5.490
## as.factor(LABEL_hd)Dupont Circle HD 0.69654 0.05578 12.487
## as.factor(LABEL_hd)Financial HD 0.59786 0.05726 10.441
## as.factor(LABEL_hd)Foggy Bottom HD 0.78872 0.05596 14.095
## as.factor(LABEL_hd)Foxhall HD 0.87223 0.05734 15.211
## as.factor(LABEL_hd)Greater 14th St HD 0.41183 0.05648 7.291
## as.factor(LABEL_hd)Greater U St HD 0.23244 0.05648 4.115
## as.factor(LABEL_hd)GWU / Old West End HD 0.81044 0.05852 13.848
## as.factor(LABEL_hd)Kalorama Triangle HD 0.66525 0.05596 11.888
## as.factor(LABEL_hd)Kingman Park HD 0.04983 0.05852 0.851
## as.factor(LABEL_hd)Lafayette Square HD 0.75342 0.05858 12.862
## as.factor(LABEL_hd)LeDroit Park HD 0.09812 0.05578 1.759
## as.factor(LABEL_hd)Logan Circle HD 0.36616 0.05578 6.564
## as.factor(LABEL_hd)Massachusetts Ave HD 0.76664 0.05578 13.743
## as.factor(LABEL_hd)Meridian Hill 0.34118 0.05852 5.830
## as.factor(LABEL_hd)Mt. Pleasant HD 0.35597 0.05596 6.361
## as.factor(LABEL_hd)Mt. Vernon Square HD 0.18100 0.05648 3.205
## as.factor(LABEL_hd)Pennsylvania Ave NHS 0.39792 0.05648 7.045
## as.factor(LABEL_hd)Shaw HD 0.26453 0.05648 4.684
## as.factor(LABEL_hd)Sheridan-Kalorama HD 0.80853 0.05596 14.449
## as.factor(LABEL_hd)Sixteenth St HD 0.41875 0.05578 7.507
## as.factor(LABEL_hd)Strivers' Section HD 0.38082 0.05596 6.805
## as.factor(LABEL_hd)Takoma Park HD 0.24564 0.05596 4.390
## as.factor(LABEL_hd)Washington Heights HD 0.55117 0.05734 9.612
## as.factor(LABEL_hd)Woodley Park HD 0.80725 0.05648 14.292
## as.factor(year_hd)1980 -0.03912 0.02593 -1.509
## as.factor(year_hd)1990 -0.02383 0.02868 -0.831
## as.factor(year_hd)2000 -0.01792 0.03235 -0.554
## as.factor(year_hd)2010 0.09108 0.03406 2.674
## as.factor(year_hd)2020 0.05635 0.03643 1.547
## treated:desig_yet_hd 0.07479 0.02922 2.559
## Pr(>|t|)
## (Intercept) 0.22720
## treated 0.00188 **
## desig_yet_hd 0.47674
## as.factor(LABEL_hd)Blagden Alley/Naylor Court HD 1.26e-06 ***
## as.factor(LABEL_hd)Bloomingdale HD 0.01705 *
## as.factor(LABEL_hd)Capitol Hill HD 2.68e-12 ***
## as.factor(LABEL_hd)Cleveland Park HD < 2e-16 ***
## as.factor(LABEL_hd)Downtown HD 8.22e-08 ***
## as.factor(LABEL_hd)Dupont Circle HD < 2e-16 ***
## as.factor(LABEL_hd)Financial HD < 2e-16 ***
## as.factor(LABEL_hd)Foggy Bottom HD < 2e-16 ***
## as.factor(LABEL_hd)Foxhall HD < 2e-16 ***
## as.factor(LABEL_hd)Greater 14th St HD 2.44e-12 ***
## as.factor(LABEL_hd)Greater U St HD 4.93e-05 ***
## as.factor(LABEL_hd)GWU / Old West End HD < 2e-16 ***
## as.factor(LABEL_hd)Kalorama Triangle HD < 2e-16 ***
## as.factor(LABEL_hd)Kingman Park HD 0.39515
## as.factor(LABEL_hd)Lafayette Square HD < 2e-16 ***
## as.factor(LABEL_hd)LeDroit Park HD 0.07953 .
## as.factor(LABEL_hd)Logan Circle HD 2.12e-10 ***
## as.factor(LABEL_hd)Massachusetts Ave HD < 2e-16 ***
## as.factor(LABEL_hd)Meridian Hill 1.36e-08 ***
## as.factor(LABEL_hd)Mt. Pleasant HD 6.94e-10 ***
## as.factor(LABEL_hd)Mt. Vernon Square HD 0.00149 **
## as.factor(LABEL_hd)Pennsylvania Ave NHS 1.15e-11 ***
## as.factor(LABEL_hd)Shaw HD 4.18e-06 ***
## as.factor(LABEL_hd)Sheridan-Kalorama HD < 2e-16 ***
## as.factor(LABEL_hd)Sixteenth St HD 6.11e-13 ***
## as.factor(LABEL_hd)Strivers' Section HD 5.00e-11 ***
## as.factor(LABEL_hd)Takoma Park HD 1.55e-05 ***
## as.factor(LABEL_hd)Washington Heights HD < 2e-16 ***
## as.factor(LABEL_hd)Woodley Park HD < 2e-16 ***
## as.factor(year_hd)1980 0.13237
## as.factor(year_hd)1990 0.40671
## as.factor(year_hd)2000 0.57995
## as.factor(year_hd)2010 0.00788 **
## as.factor(year_hd)2020 0.12287
## treated:desig_yet_hd 0.01095 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1366 on 319 degrees of freedom
## (3 observations deleted due to missingness)
## Multiple R-squared: 0.8179, Adjusted R-squared: 0.7967
## F-statistic: 38.71 on 37 and 319 DF, p-value: < 2.2e-16
plot_ly(comp_df[comp_df$group=="black",] %>% arrange(LABEL_hd, year_hd),
x = ~year_hd,
y = ~percent,
linetype = ~as.factor(treatment_control),
hovertext = ~paste0("Tract types: ", treatment_control, "\nDesignated yet: ", desig_yet_hd),
color = ~LABEL_hd, # Group lines by this variable
type = "scatter",
mode = "lines+markers") %>%
layout(title = "% black residents in each historic district areas before and after HD created")
plot_ly(comp_df[comp_df$group=="white",] %>% arrange(LABEL_hd, year_hd),
x = ~year_hd,
y = ~percent,
linetype = ~as.factor(treatment_control),
hovertext = ~paste0("Tract types: ", treatment_control, "\nDesignated yet: ", desig_yet_hd),
color = ~LABEL_hd, # Group lines by this variable
type = "scatter",
mode = "lines+markers") %>%
layout(title = "% white residents in each historic district areas before and after HD created")